Much of the code and examples are copied/modified from
Blueprints for Text Analytics Using Python by Jens Albrecht, Sidharth Ramachandran, and Christian Winkler (O'Reilly, 2021), 978-1-492-07408-3.
%run "/code/source/config/notebook_settings.py"
from source.library.text_analysis import count_tokens, tf_idf, get_context_from_keyword, count_keywords, \
count_keywords_by, impurity
with Timer("Loading Data"):
path = 'artifacts/data/processed/un-general-debates-blueprint.pkl'
df = pd.read_pickle(path)
2023-02-26 22:48:04 - INFO | Timer Started: Loading Data 2023-02-26 22:48:06 - INFO | Timer Finished: (2.29 seconds)
This section provides a basic exploration of the text and dataset.
hlp.pandas.numeric_summary(df)
| # of Non-Nulls | # of Nulls | % Nulls | # of Zeros | % Zeros | Mean | St Dev. | Coef of Var | Skewness | Kurtosis | Min | 10% | 25% | 50% | 75% | 90% | Max | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| session | 7,507 | 0 | 0.0% | 0 | 0.0% | 49.6 | 12.9 | 0.3 | -0.2 | -1.1 | 25 | 31.0 | 39.0 | 51.0 | 61.0 | 67.0 | 70 |
| year | 7,507 | 0 | 0.0% | 0 | 0.0% | 1,994.6 | 12.9 | 0.0 | -0.2 | -1.1 | 1,970 | 1,976.0 | 1,984.0 | 1,996.0 | 2,006.0 | 2,012.0 | 2,015 |
| text_length | 7,507 | 0 | 0.0% | 0 | 0.0% | 17,967.3 | 7,860.0 | 0.4 | 1.1 | 1.8 | 2,362 | 9,553.8 | 12,077.0 | 16,424.0 | 22,479.5 | 28,658.2 | 72,041 |
| num_tokens | 7,507 | 0 | 0.0% | 0 | 0.0% | 1,480.3 | 635.2 | 0.4 | 1.1 | 1.7 | 187 | 793.6 | 1,005.5 | 1,358.0 | 1,848.0 | 2,336.4 | 5,688 |
| num_bi_grams | 7,507 | 0 | 0.0% | 0 | 0.0% | 588.5 | 243.6 | 0.4 | 1.0 | 1.6 | 58 | 321.0 | 408.0 | 544.0 | 726.0 | 912.0 | 2,185 |
hlp.pandas.non_numeric_summary(df)
| # of Non-Nulls | # of Nulls | % Nulls | Most Freq. Value | # of Unique | % Unique | |
|---|---|---|---|---|---|---|
| country | 7,507 | 0 | 0.0% | ALB | 199 | 2.7% |
| country_name | 7,507 | 0 | 0.0% | Albania | 199 | 2.7% |
| speaker | 7,507 | 0 | 0.0% | <unknown> | 5,429 | 72.3% |
| position | 7,507 | 0 | 0.0% | <unknown> | 114 | 1.5% |
| text | 7,507 | 0 | 0.0% | 33: May I first convey to our [...] | 7,507 | 100.0% |
| tokens | 7,507 | 0 | 0.0% | ['may', 'first', 'convey', 'pr[...] | 7,507 | 100.0% |
| bi_grams | 7,507 | 0 | 0.0% | ['first convey', 'albanian del[...] | 7,507 | 100.0% |
df.head(2)
| session | year | country | country_name | speaker | position | text | text_length | tokens | num_tokens | bi_grams | num_bi_grams | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 25 | 1970 | ALB | Albania | Mr. NAS | <unknown> | 33: May I first convey to our President the congratulations of the Albanian delegation on his election to the Presidency of the twenty-fifth session of the General Assembly?\n34.\tIn taking up the... | 51419 | [may, first, convey, president, congratulations, albanian, delegation, election, presidency, twenty-fifth, session, general, assembly, taking, work, agenda, twenty-, fifth, session, general, assem... | 4092 | [first convey, albanian delegation, twenty-fifth session, general assembly, twenty- fifth, fifth session, general assembly, twenty-fifth anniversary, united nations, peace-loving member, member st... | 1510 |
| 1 | 25 | 1970 | ARG | Argentina | Mr. DE PABLO PARDO | <unknown> | 177.\t : It is a fortunate coincidence that precisely at a time when the United Nations is celebrating its first twenty-five years of existence, an eminent jurist so closely linked to our Organiza... | 29286 | [fortunate, coincidence, precisely, time, united, nations, celebrating, first, twenty-five, years, existence, eminent, jurist, closely, linked, organization, elected, preside, general, assembly, b... | 2341 | [fortunate coincidence, united nations, first twenty-five, twenty-five years, eminent jurist, closely linked, general assembly, argentine government, excellency mr, mr president, felicitous choice... | 884 |
df['text'].apply(impurity).max()
0.0023444160272804776
df['text'].iloc[0][0:1000]
'33: May I first convey to our President the congratulations of the Albanian delegation on his election to the Presidency of the twenty-fifth session of the General Assembly?\n34.\tIn taking up the work on the agenda of the twenty- fifth session of the General Assembly, which is being held on the eve of the twenty-fifth anniversary of the coming into force of the Charter of the United Nations, the peace-loving Member States would have wished to be in a position to present on this occasion some picture of positive and satisfactory activity on the part of the United Nations. The Albanian delegation, for its part, would have taken great pleasure in drawing up such a balance sheet of activities covering a quarter of a century, which is certainly no short period in the life of an international organization. Unfortunately, this is not the situation. Created on the day after victory had been achieved over the Powers of the Rome BerlinTokyo Axis and conceived in the spirit of the principles which'
'|'.join(df['tokens'].iloc[0])[0:1000]
'may|first|convey|president|congratulations|albanian|delegation|election|presidency|twenty-fifth|session|general|assembly|taking|work|agenda|twenty-|fifth|session|general|assembly|held|eve|twenty-fifth|anniversary|coming|force|charter|united|nations|peace-loving|member|states|would|wished|position|present|occasion|picture|positive|satisfactory|activity|part|united|nations|albanian|delegation|part|would|taken|great|pleasure|drawing|balance|sheet|activities|covering|quarter|century|certainly|short|period|life|international|organization|unfortunately|situation|created|day|victory|achieved|powers|rome|berlintokyo|axis|conceived|spirit|principles|predominated|war|antifascist|coalition|organization|awakened|whole|progressive|humanity|hope|would|serve|important|factor|creating|better|international|conditions|order|favor|cause|freedom|peace|world|security|activities|number|events|occurred|world|arena|period|disappointed|hopes|peoples|united|nations|far|contributed|required|fundamental|provision'
'|'.join(df['bi_grams'].iloc[0])[0:1000]
'first convey|albanian delegation|twenty-fifth session|general assembly|twenty- fifth|fifth session|general assembly|twenty-fifth anniversary|united nations|peace-loving member|member states|states would|satisfactory activity|united nations|albanian delegation|part would|taken great|great pleasure|balance sheet|activities covering|short period|international organization|organization unfortunately|situation created|rome berlintokyo|berlintokyo axis|antifascist coalition|organization awakened|progressive humanity|would serve|important factor|creating better|better international|international conditions|freedom peace|world security|world arena|period disappointed|united nations|nations far|fundamental provisions|international peace|liberation struggle|imperialist powers|united states|america foremost|foremost among|path diametrically|diametrically opposed|instrument favoring|pillage oppression|peace-loving peoples|united nations|committing aggression|many parts|frequently helped|direction '
Explore idiosyncrasies of various columns, e.g. same speaker represented multiple ways.
df[df['speaker'].str.contains('Bush')]['speaker'].value_counts()
George W. Bush 4 Mr. George W. Bush 2 Bush 1 George Bush 1 Mr. George W Bush 1 Name: speaker, dtype: int64
count_tokens(df['tokens']).head(20)
| frequency | |
|---|---|
| token | |
| nations | 124508 |
| united | 120763 |
| international | 117223 |
| world | 89421 |
| countries | 85734 |
| peace | 72625 |
| development | 62632 |
| states | 59713 |
| people | 59338 |
| security | 58425 |
| economic | 53833 |
| must | 51880 |
| also | 47403 |
| new | 44453 |
| country | 40878 |
| assembly | 40413 |
| community | 38294 |
| government | 37430 |
| general | 37138 |
| organization | 36434 |
ax = df['text_length'].plot(kind='box', vert=False, figsize=(10, 1))
ax.set_title("Distribution of Text Length")
ax.set_xlabel("# of Characters")
ax.set_yticklabels([])
ax;
ax = df['text_length'].plot(kind='hist', bins=60, figsize=(10, 2));
ax.set_title("Distribution of Text Length")
ax.set_xlabel("# of Characters")
ax;
import seaborn as sns
sns.displot(df['text_length'], bins=60, kde=True, height=3, aspect=3);
where = df['country'].isin(['USA', 'FRA', 'GBR', 'CHN', 'RUS'])
g = sns.catplot(data=df[where], x="country", y="text_length", kind='box')
g.fig.set_size_inches(6, 3)
g.fig.set_dpi(100)
g = sns.catplot(data=df[where], x="country", y="text_length", kind='violin')
g.fig.set_size_inches(6, 3)
g.fig.set_dpi(100)
assert not df[['year', 'country']].duplicated().any()
df.groupby('year').size().plot(title="Number of Countries");
df.\
groupby('year').\
agg({'text_length': 'mean'}).\
plot(title="Avg. Speech Length", ylim=(0,30000));
counts_df = count_tokens(df['tokens'])
counts_df.head()
| frequency | |
|---|---|
| token | |
| nations | 124508 |
| united | 120763 |
| international | 117223 |
| world | 89421 |
| countries | 85734 |
def plot_wordcloud(frequency_dict):
wc = wordcloud.WordCloud(
background_color='white',
#colormap='RdYlGn',
colormap='tab20b',
width=round(hlpp.STANDARD_WIDTH*100),
height=round(hlpp.STANDARD_HEIGHT*100),
max_words = 200, max_font_size=150,
random_state=42
)
wc.generate_from_frequencies(frequency_dict)
fig, ax = plt.subplots(figsize=(hlpp.STANDARD_WIDTH, hlpp.STANDARD_HEIGHT))
ax.imshow(wc, interpolation='bilinear')
#plt.title("XXX")
plt.axis('off')
plot_wordcloud(counts_df.to_dict()['frequency']);
tf_idf_df = tf_idf(
df=df,
tokens_column='tokens',
segment_columns = None,
min_frequency_corpus=20,
min_frequency_document=20,
)
ax = tf_idf_df.\
sort_values(by='frequency', ascending=False).\
head(30)[['frequency']].\
plot(kind='barh', width=0.99)
ax.set_title("Frequency of Uni-Grams")
ax.set_xlabel("Frequency")
ax.invert_yaxis();
ax = tf_idf_df.head(30)[['tf-idf']].plot(kind='barh', width=0.99)
ax.set_title("TF-IDF of Uni-Grams")
ax.set_xlabel("TF-IDF")
ax.invert_yaxis();
plot_wordcloud(tf_idf_df.to_dict()['tf-idf']);
tf_idf_per_year = tf_idf(
df=df,
tokens_column='tokens',
segment_columns = 'year',
min_frequency_corpus=10,
min_frequency_document=10,
)
stop_words = ['twenty-fifth', 'twenty-five', 'twenty', 'fifth']
tokens_to_show = tf_idf_per_year.query('year == 1970').reset_index()
tokens_to_show = tokens_to_show[~tokens_to_show.token.isin(stop_words)]
ax = tokens_to_show.head(30).set_index('token')[['tf-idf']].plot(kind='barh', width=0.99)
ax.set_title("TF-IDF of Uni-Grams - 1970")
ax.set_xlabel("TF-IDF")
ax.invert_yaxis();
tokens_to_show = tokens_to_show[['token', 'tf-idf']].set_index('token')
tokens_to_show = tokens_to_show.to_dict()['tf-idf']
plot_wordcloud(tokens_to_show);
stop_words = ['seventieth']
tokens_to_show = tf_idf_per_year.query('year == 2015').reset_index()
tokens_to_show = tokens_to_show[~tokens_to_show.token.isin(stop_words)]
ax = tokens_to_show.head(30).set_index('token')[['tf-idf']].plot(kind='barh', width=0.99)
ax.set_title("TF-IDF of Uni-Grams - 2015")
ax.set_xlabel("TF-IDF")
ax.invert_yaxis();
tokens_to_show = tokens_to_show[['token', 'tf-idf']].set_index('token')
tokens_to_show = tokens_to_show.to_dict()['tf-idf']
plot_wordcloud(tokens_to_show);
contexts = get_context_from_keyword(
documents=df[df['year'] == 2015]['text'],
window_width=50,
keyword='sdgs', random_seed=42
)
for x in contexts:
print(x)
-29946 03/10/2015 A/70/PV.28 Development Goals ( |SDGs| ). Beyond the immediate present, the Commonwealth nd collectively, to ensure the realization of the |SDGs| . The Commonwealth of Dominica is therefore encour that and other initiatives aimed at achieving the |SDGs| . SIDS are equally concerned about sustainably uti n of the post-2015 Sustainable Development Goals ( |SDGs| ). I should like to take this opportunity to conve nian State. We must ensure the realization of the |SDGs| in a timely manner. We hope that the upcoming cli s to translate the Sustainable Development Goals ( |SDGs| ) 15-29664 21/25 A/70/PV.23 01/10/2015 of the ne w Agenda into action. That can happen only if the |SDGs| are matched with the necessary resources, in acco , smart and innovatively to achieve the 17 global |SDGs| and the 169 targets. It has to be driven by a new t for our rural population in accordance with the |SDGs| . The leaders of the Pacific small island developi of the gender-related Goals and targets under the |SDGs| . In 2009, Solomon Islands, as an archipelagic Sta
contexts = get_context_from_keyword(
documents=df[df['year'] == 2015]['text'],
window_width=50,
keyword='sids', random_seed=42
)
for x in contexts:
print(x)
ntry, our region, small island developing States ( |SIDS| ) and our planet. We decided then, as we have done to our planet, with a disproportionate impact on |SIDS| . Our concerns have been magnified since 27 August of climate change has wreaked havoc on Caribbean |SIDS| . In 2004, Hurricane Ivan, a category 3 system, wi mpact of climate change and provide resources for |SIDS| to respond to those challenges. The recently conc ean energy. Just a few days ago, on 27 September, |SIDS| demonstrated their leadership, by creating the fi the first-ever intergovernmental organization for |SIDS| , the Small Island Developing States Sustainable E Developing States Sustainable Energy Initiative ( |SIDS| DOCK). The statute came into force with a members inable energy and climate-resilient organization. |SIDS| DOCK is intended to provided a collective voice, this first-ever intergovernmental organization of |SIDS| . 15-29946 11/33 A/70/PV.28 03/10/2015 On behalf .28 03/10/2015 On behalf of the States members of |SIDS| DOCK, we wish to express our gratitude to our par
contexts = get_context_from_keyword(
documents=df[df['year'] == 2015]['text'],
window_width=50,
keyword='pv', random_seed=42
)
for x in contexts:
print(x)
reed Sustainable 10/33 15-29946 03/10/2015 A/70/ |PV| .28 Development Goals (SDGs). Beyond the immediate ental organization of SIDS. 15-29946 11/33 A/70/ |PV| .28 03/10/2015 On behalf of the States members of lic of China has 12/33 15-29946 03/10/2015 A/70/ |PV| .28 brought about major improvements in our countr g and United Nations reform. 15-29822 5/29 A/70/ |PV| .26 02/10/2015 Two years ago in this very forum (s 0/2015 Two years ago in this very forum (see A/68/ |PV| .22), the Philippines outlined the five pillars th ly engaged in the 6/29 15-29822 02/10/2015 A/70/ |PV| .26 United Nations system. Indeed, the partnership ersations on the much-needed 15-29822 7/29 A/70/ |PV| .26 02/10/2015 and long-overdue reforms to make th iation to the United Nations 15-29822 3/29 A/70/ |PV| .26 02/10/2015 Secretary-General for his leadershi onal institutions 4/29 15-29822 02/10/2015 A/70/ |PV| .26 such as ASEAN should be be further strengthene ne refugee issue 26/54 15-29562 30/09/2015 A/70/ |PV| .19 in accordance with resolution 194 (III) and th
tf_idf_df = tf_idf(
df=df,
tokens_column='bi_grams',
segment_columns = None,
min_frequency_corpus=20,
min_frequency_document=20,
)
ax = tf_idf_df.sort_values(by='frequency', ascending=False).head(30)[['frequency']].plot(kind='barh', width=0.99)
ax.set_title("Frequency of Bi-Grams")
ax.set_xlabel("Frequency")
ax.invert_yaxis();
ax = tf_idf_df.head(30)[['tf-idf']].plot(kind='barh', width=0.99)
ax.set_title("TF-IDF of Bi-Grams")
ax.set_xlabel("TF-IDF")
ax.invert_yaxis();
tf_idf_per_year = tf_idf(
df=df,
tokens_column='bi_grams',
segment_columns = 'year',
min_frequency_corpus=3,
min_frequency_document=3,
)
stop_words = ['twenty-fifth anniversary', 'twenty-five years', 'twenty years', 'twenty fifth']
tokens_to_show = tf_idf_per_year.query('year == 1970').reset_index()
tokens_to_show = tokens_to_show[~tokens_to_show.token.isin(stop_words)]
tokens_to_show = tokens_to_show[['token', 'tf-idf']].set_index('token')
tokens_to_show = tokens_to_show.to_dict()['tf-idf']
plot_wordcloud(tokens_to_show);
stop_words = ['seventieth anniversary']
tokens_to_show = tf_idf_per_year.query('year == 2015').reset_index()
tokens_to_show = tokens_to_show[~tokens_to_show.token.isin(stop_words)]
tokens_to_show = tokens_to_show[['token', 'tf-idf']].set_index('token')
tokens_to_show = tokens_to_show.to_dict()['tf-idf']
plot_wordcloud(tokens_to_show);
stop_words = ['seventieth anniversary']
#tokens_to_show = tf_idf_per_year.query('year == 2015').reset_index()
tokens_to_show = tf_idf_per_year.reset_index()
tokens_to_show = tokens_to_show[~tokens_to_show.token.isin(stop_words)]
# filter for "climate"
tokens_to_show = tokens_to_show[tokens_to_show['token'].str.contains('climate')]
tokens_to_show
| year | token | frequency | tf-idf | |
|---|---|---|---|---|
| 1286 | 1970 | political climate | 5 | 16.98 |
| 4895 | 1971 | political climate | 5 | 16.98 |
| 6127 | 1971 | international climate | 3 | 9.91 |
| 6579 | 1972 | political climate | 15 | 50.94 |
| 7822 | 1972 | international climate | 7 | 23.12 |
| ... | ... | ... | ... | ... |
| 207523 | 2015 | climate finance | 3 | 20.16 |
| 207525 | 2015 | fight climate | 3 | 20.16 |
| 207821 | 2015 | climate system | 3 | 18.57 |
| 209036 | 2015 | new climate | 3 | 13.05 |
| 209374 | 2015 | international climate | 3 | 9.91 |
332 rows × 4 columns
tokens_to_show = tokens_to_show[['token', 'tf-idf']].set_index('token')
tokens_to_show = tokens_to_show.to_dict()['tf-idf']
plot_wordcloud(tokens_to_show);
keyword_count_over_time = count_keywords_by(
df=df,
by='year',
tokens='tokens',
keywords = ['nuclear', 'terrorism', 'climate', 'freedom'],
)
keyword_count_over_time.head()
| nuclear | terrorism | climate | freedom | |
|---|---|---|---|---|
| year | ||||
| 1970 | 192 | 7 | 18 | 128 |
| 1971 | 275 | 9 | 35 | 205 |
| 1972 | 393 | 379 | 74 | 280 |
| 1973 | 347 | 124 | 79 | 266 |
| 1974 | 478 | 24 | 71 | 316 |
ax = keyword_count_over_time.plot(kind='line')
ax.set_title("Keyword count over time")
ax.set_ylabel("# of Occurrences")
Text(0, 0.5, '# of Occurrences')
keyword_count_over_time = count_keywords_by(
df=df,
by='year',
tokens='bi_grams',
keywords = ['climate change', 'human rights', 'middle east'],
)
keyword_count_over_time.head()
| climate change | human rights | middle east | |
|---|---|---|---|
| year | |||
| 1970 | 0 | 143 | 190 |
| 1971 | 0 | 119 | 323 |
| 1972 | 0 | 124 | 282 |
| 1973 | 0 | 200 | 365 |
| 1974 | 0 | 127 | 458 |
ax = keyword_count_over_time.plot(kind='line')
ax.set_title("Keyword count over time")
ax.set_ylabel("# of Occurrences")
Text(0, 0.5, '# of Occurrences')
contexts = get_context_from_keyword(
documents=df[df['year'] <= 1980]['text'],
window_width=50,
keyword='human rights', random_seed=42
)
for x in contexts:
print(x)
e United Nations and the Universal Declaration of |Human Rights| and a threat to international peace and security. y of the adoption of the Universal Declaration of |Human Rights| . Bahrain is looking forward to participating ii t Bahrain attaches great importance to safeguarding |human rights| and fundamental freedoms for all peoples. It is s laim to be among those who laid the foundation of |human rights| , while at the same time they support racist regim y them their fundamental freedoms and inalienable |human rights| . 114. Colonialism and apartheid are crimes agains ity and would be compromised. 183. In the area of |human rights| there is already talk of a so-called third genera s already talk of a so-called third generation of |human rights| . At the same time we must note with dismay that i eas of the world not even the first generation of |human rights| —that is, those basic rights flowing from the conc lementation of the so-called second generation of |human rights| , that is, those basic rights which flow from the s of the world. All those refugees are victims of |human rights| violations, both those who seek asylum as the res
keywords = [
'terrorism', 'terrorist', 'nuclear', 'war', 'oil',
'syria', 'syrian', 'refugees', 'migration', 'peacekeeping',
'humanitarian', 'climate', 'change', 'sustainable', 'sdgs'
]
freq_df = count_keywords_by(df, by='year', tokens='tokens', keywords=keywords)
freq_df.head()
| terrorism | terrorist | nuclear | war | oil | syria | syrian | refugees | migration | peacekeeping | humanitarian | climate | change | sustainable | sdgs | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| year | |||||||||||||||
| 1970 | 7 | 9 | 192 | 360 | 1 | 7 | 15 | 16 | 1 | 45 | 28 | 18 | 57 | 0 | 0 |
| 1971 | 9 | 8 | 275 | 468 | 10 | 16 | 21 | 161 | 1 | 17 | 64 | 35 | 94 | 0 | 0 |
| 1972 | 379 | 75 | 393 | 562 | 12 | 38 | 32 | 41 | 1 | 5 | 42 | 74 | 124 | 0 | 0 |
| 1973 | 124 | 19 | 347 | 612 | 24 | 45 | 14 | 29 | 0 | 5 | 51 | 79 | 157 | 0 | 0 |
| 1974 | 24 | 13 | 478 | 466 | 247 | 28 | 10 | 55 | 2 | 23 | 46 | 71 | 187 | 0 | 0 |
df.groupby('year')['num_tokens'].sum().head()
year 1970 137933 1971 228954 1972 244654 1973 242385 1974 258090 Name: num_tokens, dtype: int64
# compute relative frequencies based on total number of tokens per year
freq_df = freq_df.div(df.groupby('year')['num_tokens'].sum(), axis=0)
# apply square root as sublinear filter for better contrast
freq_df = freq_df.apply(np.sqrt)
freq_df.head()
| terrorism | terrorist | nuclear | war | oil | syria | syrian | refugees | migration | peacekeeping | humanitarian | climate | change | sustainable | sdgs | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| year | |||||||||||||||
| 1970 | 0.01 | 0.01 | 0.04 | 0.05 | 0.00 | 0.01 | 0.01 | 0.01 | 0.00 | 0.02 | 0.01 | 0.01 | 0.02 | 0.00 | 0.00 |
| 1971 | 0.01 | 0.01 | 0.03 | 0.05 | 0.01 | 0.01 | 0.01 | 0.03 | 0.00 | 0.01 | 0.02 | 0.01 | 0.02 | 0.00 | 0.00 |
| 1972 | 0.04 | 0.02 | 0.04 | 0.05 | 0.01 | 0.01 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.02 | 0.02 | 0.00 | 0.00 |
| 1973 | 0.02 | 0.01 | 0.04 | 0.05 | 0.01 | 0.01 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.02 | 0.03 | 0.00 | 0.00 |
| 1974 | 0.01 | 0.01 | 0.04 | 0.04 | 0.03 | 0.01 | 0.01 | 0.01 | 0.00 | 0.01 | 0.01 | 0.02 | 0.03 | 0.00 | 0.00 |
plt.figure(figsize=(10, 3))
sns.set(font_scale=1)
sns.heatmap(
data=freq_df.T,
xticklabels=True, yticklabels=True, cbar=False, cmap="Reds"
)
sns.set(font_scale=1);